Link at https://docs.google.com/document/d/1bdNeOMAYY90k8FAbPRWGBgS1YBEdP09kP0vcW0tNgPc/edit?usp=sharing
european <- read_csv("01-cleaning_data_data/european_recoded.csv")
australian <- read_csv("01-cleaning_data_data/australian_recoded.csv")
dim(european)
[1] 209 117
dim(australian)
[1] 269 146
european$EU <- 1
australian$AU <- 1
all <- merge(european,australian,all = TRUE)
table(all$EU,all$AU,useNA = "always")
1 <NA>
1 0 209
<NA> 269 0
demographics_var <- c("Age","Gender","L1","speak.other.L2","study.other.L2","origins","year.studyL2","other5.other.ways","degree","roleL2.degree","study.year","prof","L2.VCE","uni1.year","Context")
l2School <- "\\.L2school$"
l2School_variables <- colnames(all)[grep(l2School,colnames(all))]
#table(all$L1,all$Context) # too many levels - needs to be cleaned (ex tot number of languages?)
table(all$L1,useNA = "always")
Afrikaans Albanian Burmese Cantonese
1 2 1 4
Chinese Croatian Dutch English
7 1 1 201
English and Dutch German German and English German and Turkish
2 77 2 1
I Indonesian Italian Japanese
1 1 89 1
Mandarin Persian Persian (Farsi) Romanian
5 1 1 3
Russian Sindhi Slovak Spanish
2 1 1 3
Turkish Ukrainian <NA>
1 1 67
ggplot(all,aes(x=L1,fill=Context)) + geom_bar() + coord_flip() + ggtitle("First Language") + labs(y="N. of participants",x="")+theme_bw()
#table(all$speak.other.L2,all$Context)
L2 <- data.frame(Freq=table(all$speak.other.L2)[order(table(all$speak.other.L2),decreasing = TRUE)],
L2=names(table(all$speak.other.L2))[order(table(all$speak.other.L2),decreasing = TRUE)]) # too many levels - needs to be cleaned (ex tot number of languages?)
head(L2)
table(all$origins,useNA = "always")
No Yes <NA>
323 89 66
table(all$year.studyL2)
0 years 1- 3 years 1-3 years
69 12 11
4-6 years First year of primary school Kindergarten
61 80 30
Less than a year more than 6 years Other
27 49 72
table(all$degree)
BA in Anglistik BA in Nordamerikastudien
43 4
HUM HUM.SCI
129 6
LA Lingue e letterature straniere
36 82
Lingue, mercati e culture dell'Asia QC
13 5
SCI
86
all$study.year[is.na(all$study.year)] <- all$uni1.year[is.na(all$study.year)]
#table(all$study.year)
all$study.year <- ifelse(all$study.year == "Already graduated after 5 semesters in March 2016, was interested in survery/study, sorry.","6th semester",all$study.year)
table(all$study.year)
1st semester 1st year 2nd semester 2nd year
72 255 5 37
3rd semester 3rd year 3rd year of Master 4th year bachelor
5 20 1 8
5th semester 6th semester 7th year Master
2 3 1 3
table(all$prof,useNA = "always")
Advanced Elementary Intermediate Upper-intermediate
78 105 84 146
<NA>
65
all$study.year[is.na(all$study.year)] <- all$uni1.year[is.na(all$study.year)]
# Filter only subject that we want to include in the study
# names(table(all$study.year))[1] = 1st semester"
filtered <- subset(all, (study.year == "1st year") | (study.year == names(table(all$study.year))[1]))
#& year.studyL2 != "0 years"
filtered$degree[filtered$Resp.ID %in% "5313976716"] <- "SCI"
filtered$degree[filtered$Resp.ID %in% "5359866545"] <- "HUM.SCI"
filtered$degree[filtered$Resp.ID %in% "5375370122"] <- "SCI"
filtered$degree[filtered$Resp.ID %in% "5375376761"] <- "HUM"
table(filtered$degree)
BA in Anglistik BA in Nordamerikastudien
39 4
HUM HUM.SCI
98 6
LA Lingue e letterature straniere
27 78
Lingue, mercati e culture dell'Asia SCI
13 58
kable(table(filtered$Context))
|Var1 | Freq|
|:--------------------|----:|
|English in Germany | 72|
|English in Italy | 91|
|German in Australia | 89|
|Italian in Australia | 75|
kable(table(filtered$year.studyL2,filtered$Context))
| | English in Germany| English in Italy| German in Australia| Italian in Australia|
|:----------------------------|------------------:|----------------:|-------------------:|--------------------:|
|0 years | 0| 0| 22| 11|
|1- 3 years | 0| 0| 0| 9|
|1-3 years | 0| 0| 7| 0|
|4-6 years | 0| 0| 35| 20|
|First year of primary school | 17| 56| 0| 0|
|Kindergarten | 6| 23| 0| 0|
|Less than a year | 0| 0| 13| 5|
|more than 6 years | 0| 0| 11| 30|
|Other | 48| 12| 0| 0|
kable(table(filtered$year.studyL2,filtered$prof))
| | Advanced| Elementary| Intermediate| Upper-intermediate|
|:----------------------------|--------:|----------:|------------:|------------------:|
|0 years | 0| 32| 1| 0|
|1- 3 years | 0| 4| 5| 0|
|1-3 years | 0| 1| 3| 3|
|4-6 years | 1| 9| 26| 19|
|First year of primary school | 20| 1| 6| 46|
|Kindergarten | 8| 1| 3| 17|
|Less than a year | 0| 11| 4| 3|
|more than 6 years | 2| 5| 16| 18|
|Other | 33| 0| 5| 22|
kable(table(filtered$year.studyL2,filtered$Context))
| | English in Germany| English in Italy| German in Australia| Italian in Australia|
|:----------------------------|------------------:|----------------:|-------------------:|--------------------:|
|0 years | 0| 0| 22| 11|
|1- 3 years | 0| 0| 0| 9|
|1-3 years | 0| 0| 7| 0|
|4-6 years | 0| 0| 35| 20|
|First year of primary school | 17| 56| 0| 0|
|Kindergarten | 6| 23| 0| 0|
|Less than a year | 0| 0| 13| 5|
|more than 6 years | 0| 0| 11| 30|
|Other | 48| 12| 0| 0|
People that have studied 0 years L2 are just a small subset of the German in Australia and Italian in Australia context which means that by correcting for context we are not removing the effect of the 0 years. A way to remove the effect of the 0 years participants and not including too many variables could be to estimate the effect of 0 years vs all.
all <- filtered
demographics_var <- c("Age","Gender","L1","speak.other.L2","study.other.L2","origins","year.studyL2","other5.other.ways","degree","roleL2.degree","study.year","prof","L2.VCE","uni1.year","Context")
l2School <- "\\.L2school$"
l2School_variables <- colnames(all)[grep(l2School,colnames(all))]
ggplot(all,aes(x=L1,fill=Context)) + geom_bar() + coord_flip() + ggtitle("First Language") + labs(y="N. of participants",x="") + theme_bw()
table(all$L1,all$Context)
English in Germany English in Italy German in Australia
Afrikaans 0 0 1
Albanian 0 1 0
Cantonese 0 0 2
Chinese 0 2 2
Dutch 1 0 0
English 1 0 75
English and Dutch 0 0 2
German 64 0 0
German and English 1 0 1
I 0 0 0
Indonesian 0 0 1
Italian 0 87 0
Japanese 0 0 1
Mandarin 0 0 1
Persian (Farsi) 0 0 1
Romanian 0 0 1
Russian 2 0 0
Sindhi 0 0 1
Spanish 1 0 0
Turkish 1 0 0
Ukrainian 0 1 0
Italian in Australia
Afrikaans 0
Albanian 0
Cantonese 0
Chinese 0
Dutch 0
English 73
English and Dutch 0
German 0
German and English 0
I 1
Indonesian 0
Italian 0
Japanese 0
Mandarin 1
Persian (Farsi) 0
Romanian 0
Russian 0
Sindhi 0
Spanish 0
Turkish 0
Ukrainian 0
table(all$degree,all$L1)
Afrikaans Albanian Cantonese Chinese Dutch English
BA in Anglistik 0 0 0 0 0 1
BA in Nordamerikastudien 0 0 0 0 0 0
HUM 1 0 2 0 0 93
HUM.SCI 0 0 0 0 0 6
LA 0 0 0 0 1 0
Lingue e letterature straniere 0 1 0 1 0 0
Lingue, mercati e culture dell'Asia 0 0 0 1 0 0
SCI 0 0 0 2 0 47
English and Dutch German German and English I
BA in Anglistik 0 34 1 0
BA in Nordamerikastudien 0 4 0 0
HUM 1 0 0 1
HUM.SCI 0 0 0 0
LA 0 25 0 0
Lingue e letterature straniere 0 0 0 0
Lingue, mercati e culture dell'Asia 0 0 0 0
SCI 1 0 1 0
Indonesian Italian Japanese Mandarin
BA in Anglistik 0 0 0 0
BA in Nordamerikastudien 0 0 0 0
HUM 0 0 0 0
HUM.SCI 0 0 0 0
LA 0 0 0 0
Lingue e letterature straniere 0 75 0 0
Lingue, mercati e culture dell'Asia 0 12 0 0
SCI 1 0 1 2
Persian (Farsi) Romanian Russian Sindhi Spanish
BA in Anglistik 0 0 2 0 1
BA in Nordamerikastudien 0 0 0 0 0
HUM 0 0 0 0 0
HUM.SCI 0 0 0 0 0
LA 0 0 0 0 0
Lingue e letterature straniere 0 0 0 0 0
Lingue, mercati e culture dell'Asia 0 0 0 0 0
SCI 1 1 0 1 0
Turkish Ukrainian
BA in Anglistik 0 0
BA in Nordamerikastudien 0 0
HUM 0 0
HUM.SCI 0 0
LA 1 0
Lingue e letterature straniere 0 1
Lingue, mercati e culture dell'Asia 0 0
SCI 0 0
#Filter by L1
nc <- names(table(all$Context))
table(all$Context)
English in Germany English in Italy German in Australia Italian in Australia
72 91 89 75
l1_filter <- all[(all$Context == nc[1] & (all$L1 == "German" | all$L1 == "German and English")) |
(all$Context == nc[2] & (all$L1 == "Italian")) |
(all$Context == nc[3] & (all$L1 == "English" | all$L1 == "English and Dutch" | all$L1 == "German and English")) |
(all$Context == nc[4] & (all$L1 == "English" | all$L1 == "English and Dutch" | all$L1 == "German and English")),]
#all <- l1_filter
# do not filter for L1
all <- all
# subset demographics
demo <- subset(all,select=c("Resp.ID",demographics_var,l2School_variables))
# Numeri finali
table(l1_filter$Context)
English in Germany English in Italy German in Australia Italian in Australia
65 87 78 73
table(all$Context)
English in Germany English in Italy German in Australia Italian in Australia
72 91 89 75
missing_bySample <- rowSums(is.na(demo))
names(missing_bySample) <- demo$Resp.ID
missing_byVar <- colSums(is.na(demo))
names(missing_byVar) <- colnames(demo)
barplot(missing_bySample)
d <- data.frame(miss=missing_byVar)
d$varID <- rownames(d)
ggplot(data=d,aes(x=varID,y=miss)) + geom_bar(stat="identity") + theme_bw() +theme(axis.text.x = element_text(angle = 45, hjust = 1))
demo_missing <- demo %>% group_by(Context) %>% summarise(roleL2.degree_na = sum(is.na(roleL2.degree)),
L2.VCE_na = sum(is.na(L2.VCE)),
other5.other.ways_na=sum(is.na(other5.other.ways )),
uni1.year_na = sum(is.na(uni1.year)),
primary1.L2school_na=sum(is.na(primary1.L2school)),
CLS3.L2school_na = sum(is.na(CLS3.L2school)),
VSL4.L2school_na=sum(is.na(VSL4.L2school)),
degree = sum(is.na(degree)),
schooL2country5.L2school_na=sum(is.na(schooL2country5.L2school)))
# We do not filter for speak.other.L2 or study.other.L2
#demo[is.na(demo$speak.other.L2),]
# teniamo
#demo[is.na(demo$study.other.L2),]
missing_bySample[names(missing_bySample) == "5166861581"]
5166861581
10
#demo[is.na(demo$year.studyL2),]
missing_bySample[names(missing_bySample) == "5378798787"]
5378798787
3
# remove NA from degree
#table(demo$degree,useNA = "always")
# Remove people
all <- all[!is.na(all$degree),]
kable(table(all$Context))
|Var1 | Freq|
|:--------------------|----:|
|English in Germany | 70|
|English in Italy | 91|
|German in Australia | 88|
|Italian in Australia | 74|
kable(table(all$study.year))
|Var1 | Freq|
|:------------|----:|
|1st semester | 70|
|1st year | 253|
kable(table(all$year.studyL2))
|Var1 | Freq|
|:----------------------------|----:|
|0 years | 33|
|1- 3 years | 9|
|1-3 years | 7|
|4-6 years | 53|
|First year of primary school | 73|
|Kindergarten | 29|
|Less than a year | 18|
|more than 6 years | 41|
|Other | 59|
recoded_dem_richi <- read_excel("02-descriptive_data/21 03 merged_filtered_imputedMedian_likertNumber.xlsx")
write.csv(all,file.path("02-descriptive_data/context-merged_filtered.csv"))
all$speak.other.L2_binary <- ifelse(!is.na(all$speak.other.L2) &
!(all$speak.other.L2 %in% c("Yes","No")),"Yes",as.character(all$speak.other.L2))
kable(table(all$speak.other.L2_binary,all$Context,useNA = "always"))
| | English in Germany| English in Italy| German in Australia| Italian in Australia| NA|
|:---|------------------:|----------------:|-------------------:|--------------------:|--:|
|No | 12| 24| 52| 53| 0|
|Yes | 57| 67| 36| 20| 0|
|NA | 1| 0| 0| 1| 0|
tabAge <- t(table(all$Age,all$Context))
ggdf <- data.frame(Age = rep(colnames(tabAge),each=4)[!(as.numeric(tabAge) == 0)],
N.Participants = as.numeric(tabAge)[!(as.numeric(tabAge) == 0)],
Context = rep(rownames(tabAge),times=3)[!(as.numeric(tabAge) == 0)])
ggplot(ggdf,aes(x=Age,y=N.Participants,fill=Context)) + geom_bar(position="dodge",colour="white",stat="identity") + scale_y_continuous(breaks=seq(0,90,10),limits=c(0,90)) + theme_bw() + ggtitle("Participants by age")+
geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
# add numbers on the bar
tabAge <- t(table(all$Gender,all$Context))
ggdf <- data.frame(Gender = rep(colnames(tabAge),each=4)[!(as.numeric(tabAge) == 0)],
N.Participants = as.numeric(tabAge)[!(as.numeric(tabAge) == 0)],
Context = rep(rownames(tabAge),times=3)[!(as.numeric(tabAge) == 0)])
ggplot(ggdf,aes(x=Gender,y=N.Participants,fill=Context)) + geom_bar(position="dodge",colour="white",stat="identity") + labs(y="N participants") + scale_y_continuous(breaks=seq(0,90,10),limits=c(0,90)) + theme_bw() + ggtitle("Participants by gender")+ geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
# add numbers on the bar
tabAge <- t(table(all$origins,all$Context))
ggplot(all,aes(x=origins,fill=Context)) + geom_bar(position="dodge",colour="white") + ggtitle("Origins by context") + scale_y_continuous(breaks=seq(0,90,10),limits=c(0,90)) + theme_bw() + draw_grob(tableGrob(tabAge), x=2, y=60, width=0.3, height=0.4) + ggtitle("Participants by origins")
tabAge
No Yes
English in Germany 65 5
English in Italy 90 1
German in Australia 63 25
Italian in Australia 36 38
tabAge <- t(table(all$prof,all$Context))
ggplot(all,aes(x=Context,fill=prof)) + geom_bar(position="dodge",colour="white") + ggtitle("Proficiency by context") + scale_y_continuous(breaks=seq(0,90,10),limits=c(0,90)) + theme_bw() + draw_grob(tableGrob(tabAge), x=2, y=80, width=0.3, height=0.4)
tabAge
Advanced Elementary Intermediate Upper-intermediate
English in Germany 38 0 5 27
English in Italy 23 2 9 57
German in Australia 4 32 25 27
Italian in Australia 0 29 29 16
tabAge <- t(table(all[all$Context != "English in Germany" & all$Context != "English in Italy","L2.VCE"],all[all$Context != "English in Germany" & all$Context != "English in Italy",'Context'],useNA = "always"))
tabAge <- tabAge[-3,]
ggplot(all[all$Context != "English in Germany" & all$Context != "English in Italy",],aes(x=Context,fill=L2.VCE)) + geom_bar(position="dodge",colour="white") + ggtitle("L2.VCE by context") + scale_y_continuous(breaks=seq(0,90,10),limits=c(0,90)) + theme_bw() + draw_grob(tableGrob(tabAge), x=2, y=80, width=0.3, height=0.4)
# year study L2
table(all$year.studyL2,all$other.year.studyL2.richi)
BILINGUAL FIRST.YEAR.SECONDARY FOURTH.YEAR.PRIMARY
0 years 0 0 0
1- 3 years 0 0 0
1-3 years 0 0 0
4-6 years 0 0 0
First year of primary school 0 0 0
Kindergarten 0 0 0
Less than a year 0 0 0
more than 6 years 0 0 0
Other 4 10 5
LOWER.SECONDARY PERSONAL SECOND.YEAR.PRIMARY
0 years 0 0 0
1- 3 years 0 0 0
1-3 years 0 0 0
4-6 years 0 0 0
First year of primary school 0 0 0
Kindergarten 0 0 0
Less than a year 0 0 0
more than 6 years 0 0 0
Other 4 2 2
SECOND.YEAR.SECONDARY THIRD.YEAR.PRIMARY
0 years 0 0
1- 3 years 0 0
1-3 years 0 0
4-6 years 0 0
First year of primary school 0 0
Kindergarten 0 0
Less than a year 0 0
more than 6 years 0 0
Other 2 28
all$year.studyL2 <- ifelse(all$year.studyL2 == "Other",all$other.year.studyL2.richi,all$year.studyL2 )
# European context
ggplot(all[all$Context == "English in Germany" | all$Context == "English in Italy",],aes(x=degree,fill=year.studyL2)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Degree by study year L2, by Context") + facet_grid(~Context,scales="free") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "N participants", x = "degree")
# Australian context
tabAge <- t(table(all[all$Context == "Italian in Australia" | all$Context == "German in Australia",'degree'],all[all$Context == "Italian in Australia" | all$Context == "German in Australia",'Context']))
ggplot(all[all$Context == "Italian in Australia" | all$Context == "German in Australia",],aes(x=Context,fill=degree)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Degree in Australian Contexts") + draw_grob(tableGrob(tabAge), x=1., y=40, width=0.3, height=0.4)
tabAge
HUM HUM.SCI SCI
German in Australia 48 4 36
Italian in Australia 50 2 22
# Australian context
tabAge <- t(table(all[all$Context == "English in Italy" | all$Context == "English in Germany",'degree'],all[all$Context == "English in Italy" | all$Context == "English in Germany",'Context']))
ggplot(all[all$Context == "English in Italy" | all$Context == "English in Germany",],aes(x=Context,fill=degree)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Degree in European Contexts")
tabAge
BA in Anglistik BA in Nordamerikastudien LA
English in Germany 39 4 27
English in Italy 0 0 0
Lingue e letterature straniere Lingue, mercati e culture dell'Asia
English in Germany 0 0
English in Italy 78 13
kable(table(all$reconnect.comm,all$Context))
| | English in Germany| English in Italy| German in Australia| Italian in Australia|
|:-----------------|------------------:|----------------:|-------------------:|--------------------:|
|Agree | 0| 0| 8| 11|
|Disagree | 0| 0| 35| 14|
|Not sure | 0| 0| 3| 4|
|Strongly agree | 0| 0| 12| 28|
|Strongly disagree | 0| 0| 30| 17|
kable(table(all$speakersmelb.comm,all$Context))
| | English in Germany| English in Italy| German in Australia| Italian in Australia|
|:-----------------|------------------:|----------------:|-------------------:|--------------------:|
|Agree | 0| 0| 44| 41|
|Disagree | 0| 0| 6| 2|
|Not sure | 0| 0| 25| 12|
|Strongly agree | 0| 0| 12| 19|
|Strongly disagree | 0| 0| 1| 0|
kable(table(all$comecloser.comm,all$Context))
| | English in Germany| English in Italy| German in Australia| Italian in Australia|
|:-----------------|------------------:|----------------:|-------------------:|--------------------:|
|Agree | 0| 0| 21| 34|
|Disagree | 0| 0| 16| 6|
|Not sure | 0| 0| 43| 17|
|Strongly agree | 0| 0| 6| 17|
|Strongly disagree | 0| 0| 2| 0|
convertToNumber <- function(column){
column <- factor(column,levels = c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
column_number <- as.numeric(column)
return(column_number)
}
table(all$Context)
English in Germany English in Italy German in Australia Italian in Australia
70 91 88 74
table(all$study.year)
1st semester 1st year
70 253
convert_likert <- data.frame(apply(subset(all,select=likert_variables_all),2,convertToNumber))
colnames(convert_likert) <- paste0(colnames(convert_likert),"1")
likert_variables1 <- paste0(likert_variables_all,"1")
# join the converted variables to the filtered dataset
filtered_conv <- cbind(all,convert_likert)
table(filtered_conv[,likert_variables_all[4]],filtered_conv[,likert_variables1[4]],useNA = "always")
1 2 3 4 5 <NA>
Agree 0 0 0 121 0 0
Disagree 0 10 0 0 0 0
Not sure 0 0 39 0 0 0
Strongly agree 0 0 0 0 152 0
Strongly disagree 1 0 0 0 0 0
<NA> 0 0 0 0 0 0
write.csv(filtered_conv,"02-descriptive_data/merged_filtered_likertNumber.csv",row.names = FALSE)
The missing values appears to be at random and there are max two missing values in one variable (see plots below). In order not to loose 12 participants while doing the factor analysis across contexts it is preferable to impute the 12 missing values.
all <- filtered_conv
# Items to use for factor analysis : items shared between contexts
# items to be used for the FA
usable_items <- likert_variables1[!(likert_variables1 %in% c("necessity1","educated1","reconnect.comm1", "speakersmelb.comm1", "comecloser.comm1"))]
rownames(all) <- all$Resp.ID
usable_data_context <- all[,c(usable_items,"Context")]
dat_noNA <- usable_data_context[rowSums(is.na(usable_data_context)) == 0,]
all_noNA <- all[rowSums(is.na(usable_data_context)) == 0,]
table(rowSums(is.na(usable_data_context)))
0 1
311 12
# Participants with NA to remove
table(rowSums(is.na(usable_data_context)),usable_data_context$Context,useNA = "always")
English in Germany English in Italy German in Australia Italian in Australia <NA>
0 70 85 87 69 0
1 0 6 1 5 0
<NA> 0 0 0 0 0
# Variable missing values
table(colSums(is.na(usable_data_context)))
0 1 2
19 10 1
table(rowSums(is.na(usable_data_context)),usable_data_context$Context,useNA = "always")
English in Germany English in Italy German in Australia Italian in Australia <NA>
0 70 85 87 69 0
1 0 6 1 5 0
<NA> 0 0 0 0 0
# check what to use to impute
# have a look at the distribution of missing values
library(mice)
Loading required package: lattice
Attaching package: ‘mice’
The following object is masked from ‘package:tidyr’:
complete
The following objects are masked from ‘package:base’:
cbind, rbind
library(VIM)
Loading required package: colorspace
Loading required package: grid
Loading required package: data.table
data.table 1.11.4 Latest news: http://r-datatable.com
Attaching package: ‘data.table’
The following objects are masked from ‘package:reshape2’:
dcast, melt
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
VIM is ready to use.
Since version 4.0.0 the GUI is in its own package VIMGUI.
Please use the package to use the new (and old) GUI.
Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
Attaching package: ‘VIM’
The following object is masked from ‘package:datasets’:
sleep
mice_plot <- aggr(usable_data_context[,usable_items], col=c('navyblue','yellow'),
numbers=TRUE, sortVars=TRUE,
labels=names(usable_data_context[,usable_items]), cex.axis=.4,
gap=1, ylab=c("Missing data","Pattern"),cex.numbers=0.5)
Variables sorted by number of missings:
# Imputing using median
library(Hmisc)
Loading required package: survival
Loading required package: Formula
Attaching package: ‘Hmisc’
The following object is masked from ‘package:sjmisc’:
%nin%
The following object is masked from ‘package:psych’:
describe
The following objects are masked from ‘package:dplyr’:
src, summarize
The following objects are masked from ‘package:base’:
format.pval, units
imputedMedian <- usable_data_context
imputedMedian$globalaccess.post1 <- with(imputedMedian[,usable_items], impute(globalaccess.post1, median))
imputedMedian$citizen.post1 <- with(imputedMedian[,usable_items], impute(citizen.post1, median))
imputedMedian$money.instru1 <- with(imputedMedian[,usable_items], impute(money.instru1, median))
imputedMedian$knowledge.instru1 <- with(imputedMedian[,usable_items], impute(knowledge.instru1, median))
imputedMedian$life.intr1 <- with(imputedMedian[,usable_items], impute(life.intr1, median))
imputedMedian$time.integr1 <- with(imputedMedian[,usable_items], impute(time.integr1, median))
imputedMedian$expect.ought1 <- with(imputedMedian[,usable_items], impute(expect.ought1, median))
imputedMedian$job.instru1 <- with(imputedMedian[,usable_items], impute(job.instru1, median))
imputedMedian$career.instru1 <- with(imputedMedian[,usable_items], impute(career.instru1, median))
imputedMedian$meeting.integr1 <- with(imputedMedian[,usable_items], impute(meeting.integr1, median))
imputedMedian$interact.post1 <- with(imputedMedian[,usable_items], impute(interact.post1, median))
# check before after
table(imputedMedian$time.integr1)
2 3 4 5
3 21 95 204
table(usable_data_context$time.integr1)
2 3 4 5
3 21 95 202
table(imputedMedian$life.intr1)
1 2 3 4 5
8 79 81 117 38
table(usable_data_context$life.intr1)
1 2 3 4 5
8 79 80 117 38
table(imputedMedian$knowledge.instru1)
1 2 3 4 5
1 2 29 189 102
table(usable_data_context$knowledge.instru1)
1 2 3 4 5
1 2 29 188 102
table(imputedMedian$money.instru1)
1 2 3 4 5
3 38 179 84 19
table(usable_data_context$money.instru1)
1 2 3 4 5
3 38 178 84 19
table(imputedMedian$citizen.post1)
1 2 3 4 5
3 22 75 148 75
table(usable_data_context$citizen.post1)
1 2 3 4 5
3 22 75 147 75
table(imputedMedian$globalaccess.post1)
1 2 3 4 5
1 3 20 159 140
table(usable_data_context$globalaccess.post1)
1 2 3 4 5
1 3 20 158 140
table(imputedMedian$expect.ought1)
1 2 3 4 5
126 142 30 21 4
table(usable_data_context$expect.ought1)
1 2 3 4 5
126 141 30 21 4
table(imputedMedian$job.instru1)
2 3 4 5
13 103 133 74
table(usable_data_context$job.instru1)
2 3 4 5
13 103 132 74
table(imputedMedian$career.instru1)
1 2 3 4 5
1 1 63 131 127
table(usable_data_context$career.instru1)
1 2 3 4 5
1 1 63 130 127
table(imputedMedian$meeting.integr1)
2 3 4 5
1 10 121 191
table(usable_data_context$meeting.integr1)
2 3 4 5
1 10 121 190
table(imputedMedian$interact.post1)
2 3 4 5
1 19 140 163
table(usable_data_context$interact.post1)
2 3 4 5
1 19 140 162
all <- all[,!(colnames(all) %in% usable_items)]
imputedMedian$Context <- NULL
sum(!(colnames(imputedMedian) %in% usable_items))
[1] 0
all <- cbind(all,imputedMedian[match(rownames(imputedMedian),all$Resp.ID),])
Add some updates that Richi did in Date 7th June 2018
> Other_ways_and_degree_role_with_respondent_IDs <- read_excel("Other-ways-and-degree-role-with-respondent-IDs.xlsx")
> sum(Other_ways_and_degree_role_with_respondent_IDs$Resp.ID != Other_ways_and_degree_role_with_respondent_IDs$Resp.ID__1)
> # to replace
> # match for the NA degree.role
>
> match_updates <- match(all$Resp.ID,Other_ways_and_degree_role_with_respondent_IDs$Resp.ID)
> all$private.lessons1.other.ways[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$private.lessons1.other.ways
> all$study.holiday2.other.ways[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$study.holiday2.other.ways
> all$year.sem.abroad3.other.ways[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$year.sem.abroad3.other.ways
> all$online.course4.other.ways[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$online.course4.other.ways
> all$other5.other.ways[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$other5.other.ways
> all$degree.role[match_updates] <- Other_ways_and_degree_role_with_respondent_IDs$degree.role
write.csv(all,"02-descriptive_data/merged_filtered_imputedMedian_likertNumber.csv",row.names = FALSE)
all_melt <- melt(all,id.vars = c("Resp.ID","Gender","Age","prof","Context","study.year"),
measure.vars = likert_variables1)
attributes are not identical across measure variables; they will be dropped
all_melt$value <- factor(all_melt$value,levels=c(1,2,3,4,5),labels=c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
# dim(all_melt)
# 323*length(likert_variables1)
all_melt <- all_melt %>% separate(variable,into=c("item","type"),sep="\\.",remove=FALSE)
Expected 2 pieces. Missing pieces filled with `NA` in 646 rows [9368, 9369, 9370, 9371, 9372, 9373, 9374, 9375, 9376, 9377, 9378, 9379, 9380, 9381, 9382, 9383, 9384, 9385, 9386, 9387, ...].
ggplot(all_melt,aes(x=variable,fill=value)) + geom_bar(position = "stack",colour="black") +
facet_grid(Context~type,scales = "free")+theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8)) + ggtitle("Filtered dataset") + scale_fill_manual(values=c("#ca0020","#f4a582","#ffffbf","#abd9e9","#2c7bb6","grey"))
filt_sum <- all_melt %>% group_by(Context,variable,type,value) %>% dplyr::summarise(Ngroup=length(value))
ggplot(filt_sum,aes(x=value,y=Ngroup,colour=Context,group=interaction(variable, Context))) + geom_line() + geom_point() + facet_wrap(~type,scales = "free")+theme(axis.text.x = element_text(angle = 45, hjust = 1))
# add numbers on the bar
educated <- all[all$Context %in% c("German in Australia","Italian in Australia"),]
table(educated$educated1,educated$Context,useNA="always")
German in Australia Italian in Australia <NA>
1 11 9 0
2 25 24 0
3 12 13 0
4 29 18 0
5 11 10 0
<NA> 0 0 0
educated$educated1 <- factor(educated$educated1,levels = c(1,2,3,4,5),labels=c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
tabEdu <- t(table(educated$educated1,educated$Context))
ggdf <- data.frame(Educated = rep(colnames(tabEdu),each=2),
N.Participants = as.numeric(tabEdu),
Context = rep(rownames(tabEdu),times=5))
ggplot(ggdf,aes(x=Educated,y=N.Participants,fill=Context)) + geom_bar(position="dodge",colour="white",stat="identity") + labs(y="N participants") + scale_y_continuous(breaks=seq(0,35,10),limits=c(0,35)) + theme_bw() + ggtitle("Educated by Context")+ geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
ggplot(ggdf,aes(x=Context,y=N.Participants,fill=Educated)) + geom_bar(position="dodge",colour="white",stat="identity") + labs(y="N participants") + scale_y_continuous(breaks=seq(0,35,10),limits=c(0,35)) + theme_bw() + ggtitle("Educated by Context")+ geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
# add numbers on the bar
necessity <- all[all$Context %in% c("English in Germany","English in Italy"),]
table(necessity$necessity1,necessity$Context,useNA="always")
English in Germany English in Italy <NA>
1 1 12 0
2 7 16 0
3 13 6 0
4 32 36 0
5 16 20 0
<NA> 1 1 0
necessity$necessity1 <- factor(necessity$necessity1,levels = c(1,2,3,4,5),labels=c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
tabNec <- t(table(necessity$necessity1,necessity$Context,useNA = "always"))[-3,]
ggdf <- data.frame(Necessity = rep(colnames(tabNec),each=2),
N.Participants = as.numeric(tabNec),
Context = rep(rownames(tabNec),times=6))
ggplot(ggdf,aes(x=Necessity,y=N.Participants,fill=Context)) + geom_bar(position="dodge",colour="white",stat="identity") + labs(y="N participants") + scale_y_continuous(breaks=seq(0,40,10),limits=c(0,40)) + theme_bw() + ggtitle("Necessity by Context")+ geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
ggplot(ggdf,aes(x=Context,y=N.Participants,fill=Necessity)) + geom_bar(position="dodge",colour="white",stat="identity") + labs(y="N participants") + scale_y_continuous(breaks=seq(0,35,10),limits=c(0,35)) + theme_bw() + ggtitle("Necessity by Context")+ geom_text(aes(label = N.Participants), hjust=0.5, vjust=-0.25, size = 2.5,position=position_dodge(width=0.9))
cov <- cor(filtered_conv[filtered_conv$Context == "Italian in Australia",likert_variables1[!(likert_variables1 %in% "necessity1")]],method = "pearson",use="pairwise.complete.obs")
data_cor_ita_in_au <- data.frame(cor_ita_in_au=cov[lower.tri(cov, diag = TRUE)],
var1 = rownames(cov)[unlist(t(mapply(":", 1:nrow(cov), nrow(cov)))[1,])],
var2 = rep(colnames(cov),times=rev(seq(nrow(cov):1))))
row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
row_infos$Variables <- as.character(row_infos$Variables)
rownames(row_infos) <- rownames(cov)
row_infos$Variables[which(is.na(row_infos$Variables))] <- c("educated")
row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
#pheatmap(cov, main = "Italian in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE], annotation_colors = ann_colors_wide,breaks=seq(-1,1,0.2),col=c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#f7f7f7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"),show_colnames = FALSE,width = 7,height = 7)
###################
diag(cov) <- NA
pheatmap(cov, main = "Italian in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
, annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))
cov_ItaAus <- cov
cov <- cor(filtered_conv[filtered_conv$Context == "German in Australia",likert_variables1[!(likert_variables1 %in% "necessity1")]],method = "pearson",use="pairwise.complete.obs")
data_cor_germ_in_au <- data.frame(cor_germ_in_au=cov[lower.tri(cov, diag = TRUE)],
var1 = rownames(cov)[unlist(t(mapply(":", 1:nrow(cov), nrow(cov)))[1,])],
var2 = rep(colnames(cov),times=rev(seq(nrow(cov):1))))
row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
row_infos$Variables <- as.character(row_infos$Variables)
rownames(row_infos) <- rownames(cov)
row_infos$Variables[which(is.na(row_infos$Variables))] <- c("educated")
row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
diag(cov) <- NA
pheatmap(cov, main = "German in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
, annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))
#
cov_GermAus <- cov
cov <- cor(filtered_conv[filtered_conv$Context == "English in Germany",likert_variables1[!(likert_variables1 %in% c("reconnect.comm1", "speakersmelb.comm1","comecloser.comm1","educated1"))]],method = "pearson",use="pairwise.complete.obs")
data_cor_eng_in_germ <- data.frame(cor_eng_in_germ=cov[lower.tri(cov, diag = TRUE)],
var1 = rownames(cov)[unlist(t(mapply(":", 1:nrow(cov), nrow(cov)))[1,])],
var2 = rep(colnames(cov),times=rev(seq(nrow(cov):1))))
row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
row_infos$Variables <- as.character(row_infos$Variables)
rownames(row_infos) <- rownames(cov)
row_infos$Variables[which(is.na(row_infos$Variables))] <- c("necessity")
row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
ann_colors_wide <- list(Variables=c(id1="#f6e8c3",necessity="#b35806",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
diag(cov) <- NA
pheatmap(cov, main = "English in Germany",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
, annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))
cov_EngGerm <- cov
cov <- cor(filtered_conv[filtered_conv$Context == "English in Italy",likert_variables1[!(likert_variables1 %in% c("reconnect.comm1","speakersmelb.comm1","comecloser.comm1","educated1"))]],method = "pearson",use="pairwise.complete.obs")
data_cor_eng_in_ita <- data.frame(cor_eng_in_ita=cov[lower.tri(cov, diag = TRUE)],
var1 = rownames(cov)[unlist(t(mapply(":", 1:nrow(cov), nrow(cov)))[1,])],
var2 = rep(colnames(cov),times=rev(seq(nrow(cov):1))))
row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
row_infos$Variables <- as.character(row_infos$Variables)
rownames(row_infos) <- rownames(cov)
row_infos$Variables[which(is.na(row_infos$Variables))] <- "necessity"
row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
ann_colors_wide <- list(Variables=c(comm1="#bd0026",necessity="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
diag(cov) <- NA
pheatmap(cov, main = "English in Italy",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
, annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))
#
cov_EngIta <- cov
We will perform an exploratory FA combining all the contexts together. This means that we are assuming that the correlation between items across context has the same direction (does not happen that cor(item1,item2)_context1 > 0 and cor(item1,item2)_context2 < 0).
sum(rownames(common_ItaAus) != colnames(common_ItaAus))
[1] 0
cov <- cor(filtered_conv[,likert_variables1],method = "pearson",use="pairwise.complete.obs")
row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
row_infos$Variables <- as.character(row_infos$Variables)
rownames(row_infos) <- rownames(cov)
row_infos$Variables[which(is.na(row_infos$Variables))] <- c("necessity","educated")
row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="orange", id1="#f6e8c3",instru1="#35978f",necessity="#b35806",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
diag(cov) <- NA
pheatmap(cov, main = "All Contexts",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
, annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))
library(GGally)
combine_cor <- merge(data_cor_eng_in_ita,data_cor_eng_in_germ,all = TRUE)
combine_cor1 <- merge(combine_cor,data_cor_germ_in_au,all = TRUE)
combine_cor2 <- merge(combine_cor1,data_cor_ita_in_au,all = TRUE)
combine_cor2 <- combine_cor2 %>% separate(var1,into=c("item1","variable1"),sep="[.]",remove=FALSE) %>%
separate(var2,into=c("item2","variable2"),sep="[.]",remove=FALSE) %>%
unite(group,variable1,variable2,sep=".")
pairs(combine_cor2[,c("cor_eng_in_ita","cor_eng_in_germ","cor_germ_in_au","cor_ita_in_au")])
ggpairs(combine_cor2[,c("cor_eng_in_ita","cor_eng_in_germ","cor_germ_in_au","cor_ita_in_au")])
We will perform an exploratory FA combining all the contexts together. This means that we are assuming that the correlation between items across context has the same direction (does not happen that cor(item1,item2)_context1 > 0 and cor(item1,item2)_context2 < 0).
common <- rownames(cov_EngIta)[rownames(cov_EngIta) %in% rownames(cov_ItaAus)]
sum(rownames(cov_EngIta) != rownames(cov_EngGerm))
sum(rownames(cov_EngIta) != rownames(cov_GermAus))
sum(rownames(cov_EngIta) != rownames(cov_ItaAus))
sum(rownames(cov_GermAus) != rownames(cov_ItaAus))
common_EngIta <- cov_EngIta[rownames(cov_EngIta) %in% common,colnames(cov_EngIta) %in% common]
common_EngGerm <- cov_EngGerm[rownames(cov_EngGerm) %in% common,colnames(cov_EngGerm) %in% common]
common_GermAus <- cov_GermAus[rownames(cov_GermAus) %in% common,colnames(cov_GermAus) %in% common]
common_ItaAus <- cov_ItaAus[rownames(cov_ItaAus) %in% common,colnames(cov_ItaAus) %in% common]
sum(rownames(common_EngIta) != colnames(common_EngIta))
sum(rownames(common_EngGerm) != colnames(common_EngGerm))
sum(rownames(common_GermAus) != colnames(common_GermAus))
sum(rownames(common_ItaAus) != colnames(common_ItaAus))
par(mfrow=c(2,3))
plot(common_EngIta,common_EngGerm)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
plot(common_EngIta,common_GermAus)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
plot(common_EngIta,common_ItaAus)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
plot(common_EngGerm,common_GermAus)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
plot(common_EngGerm,common_ItaAus)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
plot(common_GermAus,common_ItaAus)
abline(h=c(0,0.3),v=c(0,0.3),lty=2,col = "dark red")
abline(a=0,b=1,lty=2,col = "dark red")
# Largest differences
low_EngGerm <- lower.tri(common_EngGerm)
common_EngGerm[!low_EngGerm] <- NA
common_GermAus[!(lower.tri(common_GermAus))] <- NA
common_ItaAus[!(lower.tri(common_ItaAus))] <- NA
common_EngIta[!(lower.tri(common_EngIta))] <- NA
mat <- data.frame(common_EngGerm=c(common_EngGerm),
common_EngIta=c(common_EngIta),
common_GermAus = c(common_GermAus),
common_ItaAus = c(common_ItaAus),
compare = paste(rownames(common_EngGerm),colnames(common_EngGerm),sep=".")) %>%
filter(!is.na(common_EngGerm)) %>%
separate(compare,into=c("item1","variable1","item2","variable2"),remove=FALSE,sep="[.]") %>%
unite(group,variable1,variable2,sep=".")
library(ggrepel)
ggplot(mat,aes(x=common_EngGerm,y=common_GermAus,label=compare,colour=group)) + geom_point(alpha=0.5,size=0.8) + geom_text(size=2) +
geom_hline(yintercept=c(0,0.3),linetype="dotted",colour="dark red") +
geom_vline(xintercept=c(0,0.3),linetype="dotted",colour="dark red")
ggplot(mat,aes(x=common_EngGerm,y=common_ItaAus,label=compare,colour=group)) + geom_point(alpha=0.5,size=0.8) + geom_text(size=2) +
geom_hline(yintercept=c(0,0.3),linetype="dotted",colour="dark red") +
geom_vline(xintercept=c(0,0.3),linetype="dotted",colour="dark red")
ggplot(mat,aes(x=common_EngGerm,y=common_EngIta,label=compare,colour=group)) + geom_point(alpha=0.5,size=0.8) + geom_text(size=2) +
geom_hline(yintercept=c(0,0.3),linetype="dotted",colour="dark red") +
geom_vline(xintercept=c(0,0.3),linetype="dotted",colour="dark red")
sets <- list(id.var=likert_variables1[grep("\\.id1$",likert_variables1)],
ought.var=likert_variables1[grep("\\.ought1$",likert_variables1)],
intr.var=likert_variables1[grep("\\.intr1$",likert_variables1)],
instru.var=likert_variables1[grep("\\.instru1$",likert_variables1)],
integr1.var=likert_variables1[grep("\\.integr1$",likert_variables1)],
prof.var=likert_variables1[grep("\\.prof1$",likert_variables1)],
post.var=likert_variables1[grep("\\.post1$",likert_variables1)],
comm.var=likert_variables1[grep("\\.comm1$",likert_variables1)])
get_alpha <- function(dataMot,
var=sets$id.var){
var_alpha <- alpha(dataMot[,var])
dataf <- data.frame(alpha=var_alpha$total,
drop = var_alpha$alpha.drop)
rownames(dataf) <- rownames(var_alpha$alpha.drop)
return(dataf)
}
# "Italian in Australia"
ita_in_au <- do.call(rbind,lapply(sets,function(x) {
get_alpha(data=filtered_conv[filtered_conv$Context == "Italian in Australia",],
var=x)}))
ita_in_au$var <- sapply(strsplit(rownames(ita_in_au),split="\\."),function(x) x[1])
ita_in_au$var.full <- sapply(strsplit(rownames(ita_in_au),split="\\."),function(x) x[3])
ita_in_au$Context <- "Italian in Australia"
rownames(ita_in_au) <- NULL
# "German in Australia"
germ_in_au <- do.call(rbind,lapply(sets,function(x) {
get_alpha(data=filtered_conv[filtered_conv$Context == "German in Australia",],
var=x)}))
germ_in_au$var <- sapply(strsplit(rownames(germ_in_au),split="\\."),function(x) x[1])
germ_in_au$var.full <- sapply(strsplit(rownames(germ_in_au),split="\\."),function(x) x[3])
germ_in_au$Context <- "German in Australia"
rownames(germ_in_au) <- NULL
# "English in Germany"
eng_in_germ <- do.call(rbind,lapply(sets[!(names(sets) %in% "comm.var")],function(x) {
get_alpha(data=filtered_conv[filtered_conv$Context == "English in Germany",],
var=x)}))
# the ones that makes issues
get_alpha(data=filtered_conv[filtered_conv$Context == "English in Germany",],
var=sets$ought.var)
eng_in_germ$var <- sapply(strsplit(rownames(eng_in_germ),split="\\."),function(x) x[1])
eng_in_germ$var.full <- sapply(strsplit(rownames(eng_in_germ),split="\\."),function(x) x[3])
eng_in_germ$Context <- "English in Germany"
rownames(eng_in_germ) <- NULL
# "English in Italy"
eng_in_ita <- do.call(rbind,lapply(sets[!(names(sets) %in% "comm.var")],function(x) {
get_alpha(data=filtered_conv[filtered_conv$Context == "English in Italy",],
var=x)}))
eng_in_ita$var <- sapply(strsplit(rownames(eng_in_ita),split="\\."),function(x) x[1])
eng_in_ita$var.full <- sapply(strsplit(rownames(eng_in_ita),split="\\."),function(x) x[3])
eng_in_ita$Context <- "English in Italy"
rownames(eng_in_ita) <- NULL
# combine
full_alpha <- rbind(eng_in_ita,eng_in_germ,germ_in_au,ita_in_au)
full_alpha %>% group_by(Context,var) %>%
summarise(st.alpha = unique(alpha.std.alpha),
G6=unique(alpha.G6.smc.)) %>%
ggplot(.,aes(x=var,y=st.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw()
all_melt <- all_melt %>% separate(variable,into=c("item","type"),sep="\\.",remove=FALSE)
p1=ggplot(all_melt,aes(x=variable,fill=value)) + geom_bar(position = "stack") +
facet_grid(Context~type,scales = "free") + ggtitle("Filtered dataset")+theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8))+theme_bw()
p2=ggplot(full_alpha,aes(x=var.full,y=drop.std.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw() + facet_wrap(~var,scales="free")
p4=ggplot(full_alpha,aes(x=var.full,y=drop.average_r,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw() + facet_wrap(~var,scales="free")
p3=full_alpha %>% group_by(Context,var) %>%
summarise(st.alpha = unique(alpha.std.alpha),
G6=unique(alpha.G6.smc.)) %>%
ggplot(.,aes(x=var,y=st.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8)) + theme_bw()
cowplot::plot_grid(p2,p3,nrow=2)